library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)
library(ggtree)
## ggtree v3.10.1 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, David Smith, Huachen Zhu, Yi Guan, Tommy Tsan-Yuk Lam.
## ggtree: an R package for visualization and annotation of phylogenetic
## trees with their covariates and other associated data. Methods in
## Ecology and Evolution. 2017, 8(1):28-36. doi:10.1111/2041-210X.12628
##
## G Yu. Data Integration, Manipulation and Visualization of Phylogenetic
## Trees (1st ed.). Chapman and Hall/CRC. 2022. ISBN: 9781032233574
##
## Shuangbin Xu, Lin Li, Xiao Luo, Meijun Chen, Wenli Tang, Li Zhan, Zehan
## Dai, Tommy T. Lam, Yi Guan, Guangchuang Yu. Ggtree: A serialized data
## object for visualization of a phylogenetic tree and annotation data.
## iMeta 2022, 1(4):e56. doi:10.1002/imt2.56
##
## Attaching package: 'ggtree'
##
## The following object is masked from 'package:tidyr':
##
## expand
library(TDbook)
library(ggimage)
library(rphylopic)
## You are using rphylopic v.1.4.0. Please remember to credit PhyloPic contributors (hint: `get_attribution()`) and cite rphylopic in your work (hint: `citation("rphylopic")`).
##
## Attaching package: 'rphylopic'
##
## The following object is masked from 'package:ggimage':
##
## geom_phylopic
library(treeio)
## treeio v1.26.0 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## LG Wang, TTY Lam, S Xu, Z Dai, L Zhou, T Feng, P Guo, CW Dunn, BR
## Jones, T Bradley, H Zhu, Y Guan, Y Jiang, G Yu. treeio: an R package
## for phylogenetic tree input and output with richly annotated and
## associated data. Molecular Biology and Evolution. 2020, 37(2):599-603.
## doi: 10.1093/molbev/msz240
##
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
##
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
library(tidytree)
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## Guangchuang Yu, Tommy Tsan-Yuk Lam, Huachen Zhu, Yi Guan. Two methods
## for mapping and visualizing associated data on phylogeny using ggtree.
## Molecular Biology and Evolution. 2018, 35(12):3041-3043.
## doi:10.1093/molbev/msy194
##
## Guangchuang Yu. Using ggtree to visualize data on tree-like structures.
## Current Protocols in Bioinformatics. 2020, 69:e96. doi:10.1002/cpbi.96
##
## Attaching package: 'tidytree'
##
## The following object is masked from 'package:treeio':
##
## getNodeNum
##
## The following object is masked from 'package:stats':
##
## filter
library(ape)
##
## Attaching package: 'ape'
##
## The following objects are masked from 'package:tidytree':
##
## drop.tip, keep.tip
##
## The following object is masked from 'package:treeio':
##
## drop.tip
##
## The following object is masked from 'package:ggtree':
##
## rotate
##
## The following object is masked from 'package:dplyr':
##
## where
library(TreeTools)
##
## Attaching package: 'TreeTools'
##
## The following object is masked from 'package:tidytree':
##
## MRCA
##
## The following object is masked from 'package:treeio':
##
## MRCA
##
## The following object is masked from 'package:ggtree':
##
## MRCA
library(phytools)
## Loading required package: maps
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
##
##
## Attaching package: 'phytools'
##
## The following object is masked from 'package:TreeTools':
##
## as.multiPhylo
##
## The following object is masked from 'package:treeio':
##
## read.newick
library(ggnewscale)
library(ggtreeExtra)
## ggtreeExtra v1.12.0 For help: https://yulab-smu.top/treedata-book/
##
## If you use the ggtree package suite in published research, please cite
## the appropriate paper(s):
##
## S Xu, Z Dai, P Guo, X Fu, S Liu, L Zhou, W Tang, T Feng, M Chen, L
## Zhan, T Wu, E Hu, Y Jiang, X Bo, G Yu. ggtreeExtra: Compact
## visualization of richly annotated phylogenetic data. Molecular Biology
## and Evolution. 2021, 38(9):4039-4042. doi: 10.1093/molbev/msab166
library(ggstar)
library(data.table)
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
##
## The following object is masked from 'package:purrr':
##
## transpose
NEON_MAGs <- read_csv("data/NEON/GOLD_Study_ID_Gs0161344_NEON.csv")
## Rows: 1754 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Bin ID, Genome Name, Bin Quality, Bin Lineage, GTDB-Tk Taxonomy L...
## dbl (10): IMG Genome ID, Bin Completeness, Bin Contamination, Total Number ...
## date (1): Date Added
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(NEON_MAGs)
## # A tibble: 6 × 19
## `Bin ID` `Genome Name` `IMG Genome ID` `Bin Quality` `Bin Lineage`
## <chr> <chr> <dbl> <chr> <chr>
## 1 3300060643_14 Terrestrial soil mi… 3300060643 MQ <NA>
## 2 3300060643_16 Terrestrial soil mi… 3300060643 MQ Bacteria
## 3 3300060643_18 Terrestrial soil mi… 3300060643 MQ Bacteria; Ac…
## 4 3300060643_2 Terrestrial soil mi… 3300060643 MQ Bacteria; Ac…
## 5 3300060643_28 Terrestrial soil mi… 3300060643 MQ Bacteria; Ps…
## 6 3300060643_35 Terrestrial soil mi… 3300060643 MQ Bacteria; Ac…
## # ℹ 14 more variables: `GTDB-Tk Taxonomy Lineage` <chr>, `Bin Methods` <chr>,
## # `Created By` <chr>, `Date Added` <date>, `Bin Completeness` <dbl>,
## # `Bin Contamination` <dbl>, `Total Number of Bases` <dbl>, `5s rRNA` <dbl>,
## # `16s rRNA` <dbl>, `23s rRNA` <dbl>, `tRNA Genes` <dbl>, `Gene Count` <dbl>,
## # `Scaffold Count` <dbl>, `GOLD Study ID` <chr>
str(NEON_MAGs)
## spc_tbl_ [1,754 × 19] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Bin ID : chr [1:1754] "3300060643_14" "3300060643_16" "3300060643_18" "3300060643_2" ...
## $ Genome Name : chr [1:1754] "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" "Terrestrial soil microbial communities from National Grasslands LBJ, Texas, USA - CLBJ_001-M-20210506-comp-1" ...
## $ IMG Genome ID : num [1:1754] 3.3e+09 3.3e+09 3.3e+09 3.3e+09 3.3e+09 ...
## $ Bin Quality : chr [1:1754] "MQ" "MQ" "MQ" "MQ" ...
## $ Bin Lineage : chr [1:1754] NA "Bacteria" "Bacteria; Actinomycetota; Actinomycetes" "Bacteria; Actinomycetota; Actinomycetes" ...
## $ GTDB-Tk Taxonomy Lineage: chr [1:1754] "Bacteria; Acidobacteriota; Blastocatellia; Pyrinomonadales; Pyrinomonadaceae; PSRF01" "Bacteria; Acidobacteriota; Vicinamibacteria; Vicinamibacterales; UBA2999; Gp6-AA45" "Bacteria; Actinobacteriota; Actinomycetia; Streptosporangiales; Streptosporangiaceae; Chersky-822" "Bacteria; Actinobacteriota; Actinomycetia; Mycobacteriales; Jatrophihabitantaceae; JAFAWL01" ...
## $ Bin Methods : chr [1:1754] "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" "MetaBAT v2:2.15, CheckM v1.2.1, GTDB-tk v2.1.1, GTDB database release R207_v2" ...
## $ Created By : chr [1:1754] "IMG_PIPELINE" "IMG_PIPELINE" "IMG_PIPELINE" "IMG_PIPELINE" ...
## $ Date Added : Date[1:1754], format: "2023-04-06" "2023-04-06" ...
## $ Bin Completeness : num [1:1754] 96.2 77.5 77.2 58.4 68.7 ...
## $ Bin Contamination : num [1:1754] 2.56 5.3 1.99 3.74 4.67 0 2.97 3.16 1.71 5.17 ...
## $ Total Number of Bases : num [1:1754] 6247032 5394623 4389455 3228217 3245901 ...
## $ 5s rRNA : num [1:1754] 0 0 0 0 0 1 3 0 1 0 ...
## $ 16s rRNA : num [1:1754] 1 0 0 0 0 0 1 1 0 0 ...
## $ 23s rRNA : num [1:1754] 0 0 0 0 0 1 1 0 1 0 ...
## $ tRNA Genes : num [1:1754] 54 32 35 29 12 26 24 37 47 34 ...
## $ Gene Count : num [1:1754] 5373 5406 4705 3762 3446 ...
## $ Scaffold Count : num [1:1754] 39 878 607 592 474 386 270 547 10 186 ...
## $ GOLD Study ID : chr [1:1754] "Gs0161344" "Gs0161344" "Gs0161344" "Gs0161344" ...
## - attr(*, "spec")=
## .. cols(
## .. `Bin ID` = col_character(),
## .. `Genome Name` = col_character(),
## .. `IMG Genome ID` = col_double(),
## .. `Bin Quality` = col_character(),
## .. `Bin Lineage` = col_character(),
## .. `GTDB-Tk Taxonomy Lineage` = col_character(),
## .. `Bin Methods` = col_character(),
## .. `Created By` = col_character(),
## .. `Date Added` = col_date(format = ""),
## .. `Bin Completeness` = col_double(),
## .. `Bin Contamination` = col_double(),
## .. `Total Number of Bases` = col_double(),
## .. `5s rRNA` = col_double(),
## .. `16s rRNA` = col_double(),
## .. `23s rRNA` = col_double(),
## .. `tRNA Genes` = col_double(),
## .. `Gene Count` = col_double(),
## .. `Scaffold Count` = col_double(),
## .. `GOLD Study ID` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
NEON_MAGs_Ind <- NEON_MAGs %>%
filter(`Genome Name` != "NEON combined assembly")
NEON_MAGs_Ind_tax <- NEON_MAGs_Ind %>%
separate(`GTDB-Tk Taxonomy Lineage`, c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), "; ", remove = FALSE)
## Warning: Expected 6 pieces. Additional pieces discarded in 21 rows [12, 32, 66, 79, 80,
## 88, 96, 102, 104, 240, 334, 386, 657, 790, 846, 931, 943, 983, 1041, 1095,
## ...].
## Warning: Expected 6 pieces. Missing pieces filled with `NA` in 282 rows [6, 7, 42, 49,
## 50, 55, 60, 83, 85, 97, 100, 105, 107, 113, 114, 116, 119, 125, 129, 130, ...].
kable(
NEON_MAGs_Ind_tax %>%
count(Phylum, sort = TRUE)
)
| Actinobacteriota |
418 |
| Proteobacteria |
248 |
| Acidobacteriota |
181 |
| Verrucomicrobiota |
57 |
| NA |
38 |
| Chloroflexota |
35 |
| Myxococcota |
29 |
| Bacteroidota |
22 |
| Gemmatimonadota |
16 |
| Methylomirabilota |
16 |
| Planctomycetota |
16 |
| Dormibacterota |
11 |
| Eremiobacterota |
11 |
| Desulfobacterota_B |
9 |
| Desulfobacterota |
5 |
| Patescibacteria |
5 |
| Tectomicrobia |
3 |
| Cyanobacteria |
2 |
| Myxococcota_A |
2 |
| Armatimonadota |
1 |
| Chlamydiota |
1 |
| Eisenbacteria |
1 |
| Firmicutes |
1 |
| Krumholzibacteriota |
1 |
| Nitrospirota |
1 |
NEON_MAGs <- read_csv("data/NEON/GOLD_Study_ID_Gs0161344_NEON.csv") %>%
# remove columns that are not needed for data analysis
select(-c(`GOLD Study ID`, `Bin Methods`, `Created By`, `Date Added`)) %>%
# create a new column with the Assembly Type
mutate("Assembly Type" = case_when(`Genome Name` == "NEON combined assembly" ~ `Genome Name`,
TRUE ~ "Individual")) %>%
mutate_at("Assembly Type", str_replace, "NEON combined assembly", "Combined") %>%
separate(`GTDB-Tk Taxonomy Lineage`, c("Domain", "Phylum", "Class", "Order", "Family", "Genus"), "; ", remove = FALSE) %>%
# Get rid of the the common string "Soil microbial communities from "
mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>%
# Use the first `-` to split the column in two
separate(`Genome Name`, c("Site","Sample Name"), " - ") %>%
# Get rid of the the common string "S-comp-1"
mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
# separate the Sample Name into Site ID and plot info
separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>%
# separate the plot info into 3 columns
separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-")
## Rows: 1754 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): Bin ID, Genome Name, Bin Quality, Bin Lineage, GTDB-Tk Taxonomy L...
## dbl (10): IMG Genome ID, Bin Completeness, Bin Contamination, Total Number ...
## date (1): Date Added
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## Warning: Expected 6 pieces. Additional pieces discarded in 29 rows [12, 32, 66, 79, 80,
## 88, 96, 102, 104, 240, 334, 386, 657, 790, 846, 931, 943, 983, 1041, 1095,
## ...].
## Warning: Expected 6 pieces. Missing pieces filled with `NA` in 429 rows [6, 7, 42, 49,
## 50, 55, 60, 83, 85, 97, 100, 105, 107, 113, 114, 116, 119, 125, 129, 130, ...].
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 624 rows [1131, 1132,
## 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145,
## 1146, 1147, 1148, 1149, 1150, ...].
NEON_MAGs_bact_ind <- NEON_MAGs %>%
filter(Domain == "Bacteria") %>%
filter(`Assembly Type` == "Individual")
NEON_MAGs_bact_ind %>%
ggplot(aes(x = Phylum)) +
geom_bar() +
coord_flip()

NEON_MAGs_bact_ind %>%
ggplot(aes(x = fct_rev(fct_infreq(Phylum)), fill = Site)) +
geom_bar() +
coord_flip()

NEON_MAGs_bact_ind %>%
ggplot(aes(x = fct_infreq(Phylum), y = `Total Number of Bases`)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1))

NEON_MAGs_bact_ind %>%
ggplot(aes(x = Site, fill = Phylum)) +
geom_bar() +
coord_flip()

NEON_MAGs_bact_ind %>%
ggplot(aes(x = `Total Number of Bases`, y = `Gene Count`, color = Phylum)) +
geom_point() +
coord_flip()

NEON_MAGs_GSF <- NEON_MAGs %>%
filter(str_detect(`Site`, "Guanica State Forest and Biosphere Reserve, Puerto Rico"))
NEON_MAGs_D <- NEON_MAGs %>%
filter(str_detect(`GTDB-Tk Taxonomy Lineage`, "Dormibacterota"))
NEON_MAGs_GSF %>%
ggplot(aes(x=`Bin Lineage`))+
geom_bar()+
coord_flip() +
labs(title = "Bin Lineage Counts")

NEON_metagenomes <- read_tsv("data/NEON/exported_img_data_Gs0161344_NEON.tsv") %>%
rename(`Genome Name` = `Genome Name / Sample Name`) %>%
filter(str_detect(`Genome Name`, 're-annotation', negate = T)) %>%
filter(str_detect(`Genome Name`, 'WREF plot', negate = T))
## Rows: 176 Columns: 46
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (18): Domain, Sequencing Status, Study Name, Genome Name / Sample Name, ...
## dbl (16): taxon_oid, IMG Genome ID, Depth In Meters, Elevation In Meters, Ge...
## lgl (12): Altitude In Meters, Chlorophyll Concentration, Longhurst Code, Lon...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NEON_metagenomes <- NEON_metagenomes %>%
# Get rid of the the common string "Soil microbial communities from "
mutate_at("Genome Name", str_replace, "Terrestrial soil microbial communities from ", "") %>%
# Use the first `-` to split the column in two
separate(`Genome Name`, c("Site","Sample Name"), " - ") %>%
# Get rid of the the common string "-comp-1"
mutate_at("Sample Name", str_replace, "-comp-1", "") %>%
# separate the Sample Name into Site ID and plot info
separate(`Sample Name`, c("Site ID","subplot.layer.date"), "_", remove = FALSE,) %>%
# separate the plot info into 3 columns
separate(`subplot.layer.date`, c("Subplot", "Layer", "Date"), "-")
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [53].
NEON_chemistry <- read_tsv("data/NEON/neon_plot_soilChem1_metadata.tsv") %>%
# remove -COMP from genomicsSampleID
mutate_at("genomicsSampleID", str_replace, "-COMP", "")
## Rows: 87 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (5): genomicsSampleID, siteID, plotID, nlcdClass, horizon
## dbl (11): decimalLatitude, decimalLongitude, elevation, soilTemp, d15N, org...
## date (1): collectionDate
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
NEON_FULL <- NEON_MAGs %>%
left_join(NEON_metagenomes, by = c("Sample Name")) %>%
left_join(NEON_chemistry, by = c("Sample Name" = "genomicsSampleID"))
NEON_FULL_D <- NEON_FULL %>%
filter(str_detect(`Phylum`,"Dormibacterota" ))
NEON_FULL_D %>%
ggplot(aes(x = `Site.x`, y = `soilInWaterpH`)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle=50, vjust=1, hjust=1))
## Warning: Removed 11 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

NEON_FULL_D %>%
ggplot(aes(x = `Bin Contamination`)) +
geom_bar() +
labs(title = "Bin Contamination Counts")

tree_arc <- read.tree("data/NEON/gtdbtk.ar53.decorated.tree")
tree_bac <- read.tree("data/NEON/gtdbtk.bac120.decorated.tree")
node_vector_bac = c(tree_bac$tip.label,tree_bac$node.label)
grep("Dormibacterota", node_vector_bac, value = TRUE)
## [1] "'1.0:p__Dormibacterota; c__Dormibacteria'"
match(grep("Dormibacterota", node_vector_bac, value = TRUE), node_vector_bac)
## [1] 1767
NEON_MAGs_metagenomes_chemistry <- NEON_MAGs %>%
left_join(NEON_metagenomes, by = "Sample Name") %>%
left_join(NEON_chemistry, by = c("Sample Name" = "genomicsSampleID")) %>%
rename("label" = "Bin ID")
tree_bac_preorder <- Preorder(tree_bac)
tree_Dormibacterota <- Subtree(tree_bac_preorder, 1767)
NEON_MAGs_Dormibacterota <- NEON_MAGs_metagenomes_chemistry %>%
filter(Phylum == "Dormibacterota")
ggtree(tree_bac, layout="circular", branch.length="none") +
geom_hilight(node=1767, fill="steelblue", alpha=.6) +
geom_cladelab(node=1767, label="Dormibacterota", align=TRUE, offset = 0, textcolor='steelblue', barcolor='steelblue') +
geom_hilight(node=1789, fill="darkgreen", alpha=.6) +
geom_cladelab(node=1789, label="Actinomycetota", align=TRUE, vjust=-0.4, offset = 0, textcolor='darkgreen', barcolor='darkgreen') +
geom_hilight(node=2673, fill="darkorange", alpha=.6) +
geom_cladelab(node=2673, label="Acidobacteriota", align=TRUE, hjust=1.1, offset = 0, textcolor='darkorange', barcolor='darkorange')

NEON_MAGs_metagenomes_chemistry_noblank <- NEON_MAGs_metagenomes_chemistry %>%
rename("AssemblyType" = "Assembly Type") %>%
rename("BinCompleteness" = "Bin Completeness") %>%
rename("BinContamination" = "Bin Contamination") %>%
rename("TotalNumberofBases" = "Total Number of Bases") %>%
rename("EcosystemSubtype" = "Ecosystem Subtype")
ggtree(tree_Dormibacterota) %<+%
NEON_MAGs_metagenomes_chemistry +
geom_tippoint(aes(colour=`Ecosystem Subtype`)) +
# For unknown reasons the following does not like blank spaces in the names
geom_facet(panel = "Bin Completeness", data = NEON_MAGs_metagenomes_chemistry_noblank, geom = geom_point,
mapping=aes(x = BinCompleteness)) +
geom_facet(panel = "Bin Contamination", data = NEON_MAGs_metagenomes_chemistry_noblank, geom = geom_col,
aes(x = BinContamination), orientation = 'y', width = .6) +
theme_tree2(legend.position=c(.1, .7))

ggtree(tree_Dormibacterota, layout="circular") %<+%
NEON_MAGs_metagenomes_chemistry +
geom_point2(mapping=aes(color=`Ecosystem Subtype`, size=`Total Number of Bases`))
## Warning: Removed 21 rows containing missing values or values outside the scale range
## (`geom_point_g_gtree()`).
